import pandas as pd
import numpy as np

def calculate_num_valid_comparisons_and_absolute_differences(df):
  """Calculates the number of valid comparisons and the absolute difference between columns.

  Args:
    df: A Pandas DataFrame.

  Returns:
    Two NumPy arrays containing the number of valid comparisons and the absolute difference between columns.
  """

  # Create a NumPy array to store the number of valid comparisons.
  num_valid_comparisons = np.zeros((df.shape[1], df.shape[1]))

  # Create a NumPy array to store the absolute difference between columns.
  absolute_differences = np.zeros((df.shape[1], df.shape[1]))

  # Iterate over all column pairs.
  for i in range(df.shape[1]):
    for j in range(i + 1, df.shape[1]):
      # Get the values in the two columns.
      col1_values = df.iloc[:, i].values
      col2_values = df.iloc[:, j].values

      # Calculate the number of valid comparisons between the two columns, ignoring rows where either column has a NaN value.
      num_valid_comparisons[i, j] = len(np.where(np.logical_and(np.isfinite(col1_values), np.isfinite(col2_values)))[0])

      # If there are no valid comparisons, then set the absolute difference to NaN.
      if num_valid_comparisons[i, j] == 0:
        absolute_differences[i, j] = np.nan
      else:
        # Calculate the absolute difference between the two columns, ignoring rows where either column has a NaN value.
        absolute_differences[i, j] = np.nanmean(np.abs(col1_values - col2_values)[np.logical_and(np.isfinite(col1_values), np.isfinite(col2_values))])

  # Return the number of valid comparisons and the absolute difference between columns.
  return num_valid_comparisons, absolute_differences

def main():
  """Calculates the number of valid comparisons between columns and outputs two NXN CSV files containing the number of valid comparisons between columns and the absolute difference between columns.

  Usage:
    python calculate_num_valid_comparisons_and_absolute_differences.py csv_file.csv
  """

  # Read the CSV file into a Pandas DataFrame.
  csv_file = 'GSE167978_H02_all31k_400inexconly_noheader.csv'
  df = pd.read_csv(csv_file)

  # Calculate the number of valid comparisons and the absolute difference between columns.
  num_valid_comparisons, absolute_differences = calculate_num_valid_comparisons_and_absolute_differences(df)

  # Convert the number of valid comparisons and the absolute difference between columns to strings with 4 significant digits.
  num_valid_comparisons_strings = np.around(num_valid_comparisons, decimals=4).astype(str)
  absolute_differences_strings = np.around(absolute_differences, decimals=4).astype(str)

  # Output the number of valid comparisons and the absolute difference between columns to two NXN CSV files.
  np.savetxt('num_valid_comparisons_with_4_significant_digitsGSE167978_H02.csv', num_valid_comparisons_strings, delimiter=',', fmt="%s")
  np.savetxt('absolute_differences_with_4_significant_digitsGSE167978_H02.csv', absolute_differences_strings, delimiter=',', fmt="%s")

if __name__ == '__main__':
  main()

# Print a message to the console
print("Output file saved to the working directory.")
